import spacy
nlp = spacy.load("zh_core_web_sm")

def chunk_by_title(text):
    doc = nlp(text)
    chunks = []
    current_chunk = ""
    for sent in doc.sents:
        if "## " in sent.text:  # 检测二级标题
            if current_chunk: 
                chunks.append(current_chunk)
            current_chunk = sent.text
        else:
            current_chunk += " " + sent.text
    return chunks